

** make a table of bounds

/*


load and collapse to weekly sums
save as tempory files
stitch together 


*/


********************************************************************************
********************************************************************************
** load and save all tests
********************************************************************************
********************************************************************************

** all tests

use health/test_daily_panel, clear
merge m:1 sid using health/demographics, keep(1 3) assert(2 3) ///
	nogen keepusing(sid age_wide)

	** aggregate 
	egen week = cut(date), at(`=mdy(3,13,2020)'(7)`=mdy(12,22,2020)')			
	drop if missing(week) 
	drop if missing(age_wide) | age_wide == 0
	
	** tested/positive at least once in this week?
	collapse (max) positive tested, by(week age_wide sid)	
	collapse (sum) tested positive, by(week age_wide)
	
	merge m:1 age_wide using data/age_wide_counts, assert(3) nogen

	gen test_rate = tested / n
	gen conf_pos_rate = positive / n
	gen pos_rate = positive / tested

	list age_wide n test_rate conf_pos_rate pos_rate if week == mdy(3,27,2020)
	
	
	** prepare group-specific variances for aggregation
	bysort week: egen tage = total(n)
	gen share_age_group = n/tage
	gen lb_variance_wtd = share_age_group*conf_pos_rate*(1-conf_pos_rate)/n
	gen ub_variance_wtd = share_age_group*pos_rate*(1-pos_rate)/tested

	collapse (mean) test_rate conf_pos_rate pos_rate ///
		(rawsum) n lb_variance_wtd ub_variance_wtd ///
		[fw=n], by(week)
	

	
	gen lb_se = sqrt(lb_variance_wtd)
	gen ub_se = sqrt(ub_variance_wtd)
	
	list if week == mdy(3,27,2020)

	format week %td
	list, noobs clean
	
	gen group ="All"
	save health/all_weekly_wtd, replace 
	


********************************************************************************
********************************************************************************
** load and save the hospital tests, by group
********************************************************************************
********************************************************************************
foreach group in  no_icli any_major any_icli pool_icli {
	
	
	* load the data
	use health/inp_clean if ~no_dx, clear
	if "`group'" == "no_icli" gen no_icli = any_icli == 0
	if "`group'" == "pool_icli" gen pool_icli = 1
	keep if `group' == 1

	merge m:1 sid using health/demographics, assert(2 3) keep(1 3) ///
		keepusing(sid age_wide) nogen
	drop if missing(age_wide) | age_wide == 0
	egen week = cut(admit_date), at(`=mdy(3,13,2020)'(7)`=mdy(12,22,2020)')		
	
	drop if missing(week)
	collapse (sum) test_tight pos_tight (count) n=test_tight, ///
		by(week age_wide)
	gen test_rate = test_tight / n
	gen conf_pos_rate = pos_tight / n
	gen pos_rate = pos_tight / test_tight 
	
	rename n n_hospitalizations 
	merge m:1 age_wide using data/age_wide_counts, assert(3)
	rename n n_age_group
	desc, full 
	
	noi list age_wide test_tight pos_tight n_hospitalizations n_age_group ///
		if week == 22050, noobs clean
	

	** prepare group-specific variances for aggregation
	bysort week: egen tage = total(n_age_group)
	gen share_age_group = n_age_group/tage
	gen lb_variance_wtd = share_age_group*conf_pos_rate*(1-conf_pos_rate)/n_hospitalizations
	gen ub_variance_wtd = share_age_group*pos_rate*(1-pos_rate)/test_tight

	collapse (mean) test_rate conf_pos_rate pos_rate ///
		(rawsum) n_hospitalizations lb_variance_wtd ub_variance_wtd ///
		[fw=n_age_group], by(week)
	rename n_hospitalizations n
	
	gen lb_se = sqrt(lb_variance_wtd)
	gen ub_se = sqrt(ub_variance_wtd)
	
	noi list conf_pos_rate pos_rate lb_se ub_se	if week == 22050, noobs clean
	
	gen group = "`group'"
	save health/`group'_weekly_wtd, replace

	format week %td
	format test_rate-pos_rate %4.3f
	noi list week test_rate conf_pos_rate pos_rate , noobs clean
	
}

********************************************************************************
********************************************************************************
** append together etc
********************************************************************************
********************************************************************************

clear
foreach group in all any_icli no_icli any_major pool_icli {
	append using health/`group'_weekly_wtd
}



gen groupn = 1 if group == "All"
replace groupn = 2 if group == "no_icli"
replace groupn = 3 if group == "any_major"
replace groupn = 4 if group == "any_icli"
replace groupn = 5 if group == "pool_icli"

rename conf_pos_rate lower_bound
rename pos_rate upper_bound

** find maximal difference
gen diff = upper_bound - lower_bound
sum diff if group == "All"
sum week if diff == r(min)
outsheet week group lower_bound upper_bound diff if week == r(mean) ///
	using tables/min_diff_week.csv, replace noquote 
list week if diff == r(min)
drop diff



** SEs for pop
gen lower_bound_ci = max(lower_bound - 1.96*lb_se, 0)
gen upper_bound_ci = upper_bound + 1.96*ub_se
drop lb_se ub_se lb_variance_wtd ub_variance_wtd

preserve
keep n test_rate group lower_bound upper_bound week groupn
reshape wide n test_rate group lower_bound upper_bound, i(week) j(groupn)
count
gen ci = 0
tempfile bounds
save `bounds', replace
restore
drop lower_bound upper_bound
rename lower_bound_ci lower_bound
rename upper_bound_ci upper_bound
reshape wide n test_rate group  lower_bound upper_bound, i(week) j(groupn)
gen ci  = 1
append using `bounds'
sort week ci 
	
gen tt =	""
foreach v of varlist test_rate* lower_bound* upper_bound* {
	local fmt %4.3f
	if strpos("`v'", "lower_bound") local fmt %5.4f
	replace tt = string(`v', "`fmt'") 
	
	** format for confidence intervals
	if strpos("`v'", "lower_bound") replace tt = "(" + tt + ", " if ci
	if strpos("`v'", "upper_bound") replace tt = tt +")" 				 if ci
	if strpos("`v'", "test_rate")   replace tt = ""              if ci
	
	drop `v'
	gen `v' = tt + " & " 
}

forvalues n = 1/5 {
	replace tt = string(n`n', "%10.0fc") + " & " 
	drop n`n'
	gen n`n' = tt 
	replace n`n' = " & " if ci 
}

gen month = month(week)
gen day = day(week)
gen year = year(week)
tostring month day year, replace
replace day = "0" + day if length(day)==1
gen date_str = month + "/" + day + "/" + year  + " & " 
replace date_str = "&" if ci
replace test_rate4 = subinstr(test_rate4, "&", "\\",.)
replace upper_bound4 = subinstr(upper_bound4, "&" , "\\", .)

list date_str test_rate1 n2 test_rate2 n3 test_rate3 n4 test_rate4 if ~ci, noobs clean	


outsheet date_str test_rate1 n2 test_rate2 n3 test_rate3 n4 test_rate4 if ~ci ///
	using tables/test_rate_by_group_wtd.tex, replace noquote nonames delimit(" ")
	
	
** 2: no icli vs. 5 (pool icli and not)
list date_str n2 test_rate2 n5 test_rate5 if ~ci, noobs clean	

	
list date_str lower_bound1 upper_bound1 lower_bound2 upper_bound2 ///
	lower_bound3 upper_bound3 lower_bound4 upper_bound4, noobs clean
	
	
replace upper_bound5 = subinstr(upper_bound5, "&" , "\\", .)
list date_str lower_bound2 upper_bound2 lower_bound1 upper_bound5 if ///
	strpos(date_str, "2020"), noobs clean
	
	
destring month, replace
outsheet date_str lower_bound2 upper_bound2 lower_bound1 upper_bound5 ///
	if month == 6 using tables/bounds_pool_hospitalizations.tex , ///
	replace noquote nonames delimit(" ")
	
list date_str lower_bound1 upper_bound1 lower_bound2 upper_bound2 in 1/3
ss
	
# delimit ;
outsheet date_str 
	lower_bound1 upper_bound1 
	lower_bound2 upper_bound2 
	lower_bound3 upper_bound3 
	lower_bound4 upper_bound4 
	if month < 8
	using tables/bounds_by_group_wtd_Mar_Jul.tex, /// 
	replace noquote nonames delimit(" ")
;
# delimit ;
outsheet date_str 
	lower_bound1 upper_bound1 
	lower_bound2 upper_bound2 
	lower_bound3 upper_bound3 
	lower_bound4 upper_bound4 
	if month >= 8
	using tables/bounds_by_group_wtd_Aug_Del.tex, /// 
	replace noquote nonames delimit(" ")
;
# delimit cr

	
	
clear
foreach group in all any_icli no_icli any_major {
	append using health/`group'_weekly_wtd
}
	
# delimit ;
twoway
	(line test_rate week if group == "All", color(navy) )
	(pci .03 `=mdy(12,4,2020)' .08 `=mdy(12,4,2020)', color(navy))
	
	(line test_rate week if group == "no_icli", color(maroon) )
	(pci .36 `=mdy(10,1,2020)' .29 `=mdy(10,1, 2020)', color(maroon))
	
	(line test_rate week if group == "any_major", color(maroon) lpattern(dash) )
	(pci .15 `=mdy(10,1,2020)' .22 `=mdy(10,1, 2020)', color(maroon))
	
	(line test_rate week if group == "any_icli", color(gs5))	
	(pci .74 `=mdy(9,13,2020)' .62 `=mdy(9,13,2020)', color(gs5))
	,
	xsize(9) ysize(6)
	ylabel(0(.2).8)
	legend(off)
	xtitle("")
	ytitle("")
	xlabel(`=mdy(3,13,2020)' "3/13"	`=mdy(4,13,2020)' "4/13" 
		`=mdy(5,13,2020)' "5/13" `=mdy(6,13,2020)' "6/13" `=mdy(7,13,2020)' "7/13"
		`=mdy(8,13,2020)' "8/13" `=mdy(9,13,2020)' "9/13" `=mdy(10,13,2020)' "10/13"
		`=mdy(11,13,2020)' "11/13" `=mdy(12,13,2020)' "12/13")
	graphregion(color(white))
	text(.76 `=mdy(8,22,2020)' "ICLI hospitalizations", size(small) 
		place(e) color(gs5))
	
	text(.38 `=mdy(10,1,2020)' "Non-ICLI hospitalizations", size(small)
		 color(maroon))	
	text(.14 `=mdy(10,1,2020)' "Clear cause hospitalizations", size(small) 
		color(maroon))
	
	text(.1 `=mdy(12,15,2020)' "Population", size(small) place(w) color(navy))
;
# delimit cr
	
graph export figures/tests_all_groups_wtd.pdf, replace 
	
# delimit ;
twoway
	(rarea pos_rate conf_pos_rate week if group == "All", color(navy%20) )
	(rarea pos_rate conf_pos_rate week if group == "no_icli", color(maroon%20) )	
	(rarea pos_rate conf_pos_rate week if group == "any_major", color(maroon%60))		
	(rarea pos_rate conf_pos_rate week if group == "any_icli", color(gs5%25) )
	
	
	(line pos_rate week if group == "no_icli", color(maroon) 
		lpattern(dash))
	(line conf_pos_rate week if group == "no_icli", color(maroon) 
		lpattern(dash))
		
	,
	
	xlabel(`=mdy(3,13,2020)' "3/13"	`=mdy(4,13,2020)' "4/13" 
		`=mdy(5,13,2020)' "5/13" `=mdy(6,13,2020)' "6/13" `=mdy(7,13,2020)' "7/13"
		`=mdy(8,13,2020)' "8/13" `=mdy(9,13,2020)' "9/13" `=mdy(10,13,2020)' "10/13"
		`=mdy(11,13,2020)' "11/13" `=mdy(12,13,2020)' "12/13")
		
	text(.15 `=mdy(12,11,2020)' "Population", color(navy) place(e)  size(small))
	text(.35 `=mdy(12,11,2020)' "ICLI" , 
		color(gs5) place(e)  size(small))
	text(.04 `=mdy(12,11,2020)' "Non-ICLI" , 
		place(e) color(maroon) size(small)) 
	text(.08 `=mdy(12,11,2020)' "Clear cause" , 
		place(e) justification(left) 
		color(maroon) size(small)) 
	xsize(9) ysize(6)
	legend(off)
	graphregion(color(white) margin(r+10))
	xtitle("")
	ytitle("")
	
;
# delimit cr
	
graph export figures/bounds_all_groups_wtd.pdf, replace 



gen ci_lower = max(0,conf_pos_rate-1.96*lb_se )
gen ci_upper = pos_rate + 1.96*ub_se



foreach g in  any_icli any_major no_icli {

	if "`g'" == "any_icli" {
		local title "A. ICLI Hospitalizations"
		local color gs5	
	}
	if "`g'" == "no_icli" {
		local title "B. Non-ICLI Hospitalizations"
		local color maroon
	}
	if "`g'" == "any_major" {
		local title "C. Clear Cause Hospitalizations"
		local color maroon	
	}

	# delimit ;
	twoway
	
		(rarea pos_rate conf_pos_rate week if group == "All", color(navy%50) )		
		(line ci_lower week if group == "`g'", color(navy) lpattern(dash))
		(line ci_upper week if group == "`g'", color(navy) lpattern(dash))
	
		(rarea pos_rate conf_pos_rate week if group == "`g'", color(`color'%50) )		
		(line ci_lower week if group == "`g'", color(`color') lpattern(dash))
		(line ci_upper week if group == "`g'", color(`color') lpattern(dash))
			
		,
		
		xlabel(`=mdy(3,13,2020)' "3/13" `=mdy(6,13,2020)' "6/13"  
			`=mdy(9,13,2020)' "9/13" `=mdy(12,13,2020)' "12/13", labsize(medium) )
		ylabel(0(.2).6, labsize(medium))
		xsize(5) ysize(3)
		legend(off)
		xtitle("")
		ytitle("")
		graphregion(color(white) )
		title("`title'", size(medium) pos(11) ring(0))
		name(`g', replace)
		
	;
	# delimit cr

}

graph combine any_icli no_icli any_major, cols(1) iscale(1) ///
	graphregion(color(white)) xsize(5) ysize(9)
	
graph export figures/bounds_all_groups_wtd_ci.pdf, replace 
